@incollection{Bengio+chapter2007,
author = {Bengio, Yoshua and LeCun, Yann},
booktitle = {Large Scale Kernel Machines},
publisher = {MIT Press},
title = {Scaling Learning Algorithms Towards {AI}},
year = {2007}
}
@article{Hinton06,
author = {Hinton, Geoffrey E. and Osindero, Simon and Teh, Yee Whye},
journal = {Neural Computation},
pages = {1527--1554},
title = {A Fast Learning Algorithm For Deep Belief Nets},
volume = {18},
year = {2006}
}
@inproceedings{NEURIPS2023_91f18a12,
 author = {Zheng, Lianmin and Chiang, Wei-Lin and Sheng, Ying and Zhuang, Siyuan and Wu, Zhanghao and Zhuang, Yonghao and Lin, Zi and Li, Zhuohan and Li, Dacheng and Xing, Eric and Zhang, Hao and Gonzalez, Joseph E and Stoica, Ion},
 booktitle = {Advances In Neural Information Processing Systems},
 editor = {A. Oh and T. Naumann and A. Globerson and K. Saenko and M. Hardt and S. Levine},
 pages = {46595--46623},
 publisher = {Curran Associates, Inc.},
 title = {Judging LLM-as-a-Judge With MT-Bench And Chatbot Arena},
 url = {https://proceedings.neurips.cc/paper_files/paper/2023/file/91f18a1287b398d378ef22505bf41832-Paper-Datasets_and_Benchmarks.pdf},
 volume = {36},
 year = {2023}
}
@article{zhu2024locking,
  title={Locking Down the Finetuned LLMs Safety},
  author={Zhu, Minjun and Yang, Linyi and Wei, Yifan and Zhang, Ningyu and Zhang, Yue},
  journal={arXiv preprint arXiv:2410.10343},
  year={2024}
}

@inproceedings{jin2024agentreviewexploringpeerreview,
  title = "{A}gent{R}eview: Exploring Peer Review Dynamics with {LLM} Agents",
  author = "Jin, Yiqiao  and
    Zhao, Qinlin  and
    Wang, Yiyang  and
    Chen, Hao  and
    Zhu, Kaijie  and
    Xiao, Yijia  and
    Wang, Jindong",
  editor = "Al-Onaizan, Yaser  and
    Bansal, Mohit  and
    Chen, Yun-Nung",
  booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
  month = nov,
  year = "2024",
  address = "Miami, Florida, USA",
  publisher = "Association for Computational Linguistics",
  url = "https://aclanthology.org/2024.emnlp-main.70",
  pages = "1208--1226",
}
@inproceedings{NEURIPS2020_6b493230,
 author = {Lewis, Patrick and Perez, Ethan and Piktus, Aleksandra and Petroni, Fabio and Karpukhin, Vladimir and Goyal, Naman and K"{u}ttler, Heinrich and Lewis, Mike and Yih, Wen-tau and Rockt"{a}schel, Tim and Riedel, Sebastian and Kiela, Douwe},
 booktitle = {Advances In Neural Information Processing Systems},
 editor = {H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin},
 pages = {9459--9474},
 publisher = {Curran Associates, Inc.},
 title = {Retrieval-Augmented Generation For Knowledge-Intensive NLP Tasks},
 url = {https://proceedings.neurips.cc/paper_files/paper/2020/file/6b493230205f780e1bc26945df7481e5-Paper.pdf},
 volume = {33},
 year = {2020}
}
@article{lambert2024rewardbench,
  title={Rewardbench: Evaluating reward models for language modeling},
  author={Lambert, Nathan and Pyatkin, Valentina and Morrison, Jacob and Miranda, LJ and Lin, Bill Yuchen and Chandu, Khyathi and Dziri, Nouha and Kumar, Sachin and Zick, Tom and Choi, Yejin and others},
  journal={arXiv preprint arXiv:2403.13787},
  year={2024}
}
@article{reid2024gemini,
  title={Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context},
  author={Reid, Machel and Savinov, Nikolay and Teplyashin, Denis and Lepikhin, Dmitry and Lillicrap, Timothy and Alayrac, Jean-baptiste and Soricut, Radu and Lazaridou, Angeliki and Firat, Orhan and Schrittwieser, Julian and others},
  journal={arXiv preprint arXiv:2403.05530},
  year={2024}
}
@article{achiam2023gpt,
  title={\text{GPT-4} Technical Report},
  author={Achiam, Josh and Adler, Steven and Agarwal, Sandhini and Ahmad, Lama and Akkaya, Ilge and Aleman, Florencia Leoni and Almeida, Diogo and Altenschmidt, Janko and Altman, Sam and Anadkat, Shyamal and others},
  journal={arXiv preprint arXiv:2303.08774},
  year={2023}
}
@article{d2024marg,
  title={Marg: Multi-agent review generation for scientific papers},
  author={D'Arcy, Mike and Hope, Tom and Birnbaum, Larry and Downey, Doug},
  journal={arXiv preprint arXiv:2401.04259},
  year={2024}
}
@article{liang2024can,
  title={Can large language models provide useful feedback on research papers? A large-scale empirical analysis},
  author={Liang, Weixin and Zhang, Yuhui and Cao, Hancheng and Wang, Binglu and Ding, Daisy Yi and Yang, Xinyu and Vodrahalli, Kailas and He, Siyu and Smith, Daniel Scott and Yin, Yian and others},
  journal={NEJM AI},
  volume={1},
  number={8},
  pages={AIoa2400196},
  year={2024},
  publisher={Massachusetts Medical Society}
}
@inproceedings{bao2021predicting,
  title={Predicting Paper Acceptance Via Interpretable Decision Sets},
  author={Bao, Peng and Hong, Weihui and Li, Xuanya},
  booktitle={Companion Proceedings of the Web Conference 2021},
  pages={461--467},
  year={2021}
}
@article{wei2022chain,
  title={Chain-of-thought prompting elicits reasoning in large language models},
  author={Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and Xia, Fei and Chi, Ed and Le, Quoc V and Zhou, Denny and others},
  journal={Advances in neural information processing systems},
  volume={35},
  pages={24824--24837},
  year={2022}
}
@misc{tan2024peerreviewmultiturnlongcontext,
      title={Peer Review as A Multi-Turn and Long-Context Dialogue with Role-Based Interactions},
      author={Cheng Tan and Dongxin Lyu and Siyuan Li and Zhangyang Gao and Jingxuan Wei and Siqi Ma and Zicheng Liu and Stan Z. Li},
      year={2024},
      eprint={2406.05688},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2406.05688},
}
@inproceedings{weng-etal-2023-large,
    title = "Large Language Models are Better Reasoners with Self-Verification",
    author = "Weng, Yixuan  and
      Zhu, Minjun  and
      Xia, Fei  and
      Li, Bin  and
      He, Shizhu  and
      Liu, Shengping  and
      Sun, Bin  and
      Liu, Kang  and
      Zhao, Jun",
    editor = "Bouamor, Houda  and
      Pino, Juan  and
      Bali, Kalika",
    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
    month = dec,
    year = "2023",
    address = "Singapore",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.findings-emnlp.167",
    doi = "10.18653/v1/2023.findings-emnlp.167",
    pages = "2550--2575",
    abstract = "Recently, with the chain of thought (CoT) prompting, large language models (LLMs), e.g., GPT-3, have shown strong reasoning ability in several natural language processing tasks such as arithmetic, commonsense, and logical reasoning. However, LLMs with CoT require multi-step prompting and multi-token prediction, which is highly sensitive to individual mistakes and vulnerable to error accumulation. The above issues make the LLMs need the ability to verify the answers. In fact, after inferring conclusions in some thinking decision tasks, people often check them by re-verifying steps to avoid some mistakes. In this paper, we propose and prove that LLMs also have similar self-verification abilities. We take the conclusion obtained by CoT as one of the conditions for solving the original problem. By performing a backward verification of the answers that LLM deduced for itself, we can obtain interpretable answer validation scores to select the candidate answer with the highest score. Experimental results demonstrate that the proposed method can improve the reasoning performance on various arithmetic, commonsense, and logical reasoning datasets. Our code is publicly available at: https://github.com/WENGSYX/Self-Verification.",
}


@inproceedings{shinn2023reflexion,
 author = {Shinn, Noah and Cassano, Federico and Gopinath, Ashwin and Narasimhan, Karthik and Yao, Shunyu},
 booktitle = {Advances in Neural Information Processing Systems},
 editor = {A. Oh and T. Naumann and A. Globerson and K. Saenko and M. Hardt and S. Levine},
 pages = {8634--8652},
 publisher = {Curran Associates, Inc.},
 title = {Reflexion: language agents with verbal reinforcement learning},
 url = {https://proceedings.neurips.cc/paper_files/paper/2023/file/1b44b878bb782e6954cd888628510e90-Paper-Conference.pdf},
 volume = {36},
 year = {2023}
}

@article{zhang2024generative,
  title={Generative Verifiers: Reward Modeling as Next-Token Prediction},
  author={Zhang, Lunjun and Hosseini, Arian and Bansal, Hritik and Kazemi, Mehran and Kumar, Aviral and Agarwal, Rishabh},
  journal={arXiv preprint arXiv:2408.15240},
  year={2024}
}
@article{robertson2023gpt4,
  title={Gpt4 is slightly helpful for peer-review assistance: A pilot study},
  author={Robertson, Zachary},
  journal={arXiv preprint arXiv:2307.05492},
  year={2023}
}

@inproceedings{kuzmin2024fp8quantizationpowerexponent,
 author = {Kuzmin, Andrey and van Baalen, Mart and Ren, Yuwei and Nagel, Markus and Peters, Jorn and Blankevoort, Tijmen},
 booktitle = {Advances in Neural Information Processing Systems},
 editor = {S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh},
 pages = {14651--14662},
 publisher = {Curran Associates, Inc.},
 title = {FP8 Quantization: The Power of the Exponent},
 url = {https://proceedings.neurips.cc/paper_files/paper/2022/file/5e07476b6bd2497e1fbd11b8f0b2de3c-Paper-Conference.pdf},
 volume = {35},
 year = {2022}
}

@inproceedings{
hu2022lora,
title={Lo{RA}: Low-Rank Adaptation of Large Language Models},
author={Edward J Hu and Yelong Shen and Phillip Wallis and Zeyuan Allen-Zhu and Yuanzhi Li and Shean Wang and Lu Wang and Weizhu Chen},
booktitle={International Conference on Learning Representations},
year={2022},
url={https://openreview.net/forum?id=nZeVKeeFYf9}
}
@misc{wang2024loragalowrankadaptationgradient,
      title={LoRA-GA: Low-Rank Adaptation with Gradient Approximation},
      author={Shaowen Wang and Linxi Yu and Jian Li},
      year={2024},
      eprint={2407.05000},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2407.05000},
}
@inproceedings{10.1145/3394486.3406703,
author = {Rasley, Jeff and Rajbhandari, Samyam and Ruwase, Olatunji and He, Yuxiong},
title = {DeepSpeed: System Optimizations Enable Training Deep Learning Models With Over 100 Billion Parameters},
year = {2020},
isbn = {9781450379984},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3394486.3406703},
doi = {10.1145/3394486.3406703},
abstract = {Explore new techniques in Microsoft's open source library called DeepSpeed, which advances large model training by improving scale, speed, cost, and usability, unlocking the ability to train 100-billion-parameter models. DeepSpeed is compatible with PyTorch. One piece of our library, called ZeRO, is a new parallelized optimizer that greatly reduces the resources needed for model and data parallelism while massively increasing the number of parameters that can be trained. Researchers have used these breakthroughs to create Turing Natural Language Generation (Turing-NLG), which at the time of its release was the largest publicly known language model at 17 billion parameters. In addition we will also go over our latest transformer kernel advancements that led the DeepSpeed team to achieve the world fastest BERT pretraining record.The Zero Redundancy Optimizer (ZeRO) is a novel memory optimization technology for large-scale distributed deep learning. ZeRO can train deep learning models with over 100 billion parameters on the current generation of GPU clusters at three to five times the throughput of the current best system. It also presents a clear path to training models with trillions of parameters, demonstrating an unprecedented leap in deep learning system technology.DeepSpeed brings state-of-the-art training techniques, such as ZeRO, optimized kernels, distributed training, mixed precision, and checkpointing, through lightweight APIs compatible with PyTorch. With just a few lines of code changes to your PyTorch model, you can leverage DeepSpeed to address underlying performance challenges and boost the speed and scale of your training.},
booktitle = {Proceedings Of The 26th ACM SIGKDD International Conference On Knowledge Discovery \& Data Mining},
pages = {3505–3506},
numpages = {2},
keywords = {distributed deep learning, machine learning},
location = {Virtual Event, CA, USA},
series = {KDD '20}
}
@article{hosseini2023fighting,
  title={Fighting reviewer fatigue or amplifying bias? Considerations and recommendations for use of ChatGPT and other large language models in scholarly peer review},
  author={Hosseini, Mohammad and Horbach, Serge PJM},
  journal={Research integrity and peer review},
  volume={8},
  number={1},
  pages={4},
  year={2023},
  publisher={Springer}
}
@misc{rajbhandari2020zeromemoryoptimizationstraining,
      title={ZeRO: Memory Optimizations Toward Training Trillion Parameter Models},
      author={Samyam Rajbhandari and Jeff Rasley and Olatunji Ruwase and Yuxiong He},
      year={2020},
      eprint={1910.02054},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/1910.02054},
}
@article{zhang2022investigating,
  title={Investigating fairness disparities in peer review: A language model enhanced approach},
  author={Zhang, Jiayao and Zhang, Hongming and Deng, Zhun and Roth, Dan},
  journal={arXiv preprint arXiv:2211.06398},
  year={2022}
}
@article{nuijten2016prevalence,
  title={The prevalence of statistical reporting errors in psychology (1985--2013)},
  author={Nuijten, Mich{\`e}le B and Hartgerink, Chris HJ and Van Assen, Marcel ALM and Epskamp, Sacha and Wicherts, Jelte M},
  journal={Behavior research methods},
  volume={48},
  pages={1205--1226},
  year={2016},
  publisher={Springer}
}
@article{collins2017supervised,
  title={A supervised approach to extractive summarisation of scientific papers},
  author={Collins, Ed and Augenstein, Isabelle and Riedel, Sebastian},
  journal={arXiv preprint arXiv:1706.03946},
  year={2017}
}
@article{liu2023reviewergpt,
  title={Reviewergpt? an exploratory study on using large language models for paper reviewing},
  author={Liu, Ryan and Shah, Nihar B},
  journal={arXiv preprint arXiv:2306.00622},
  year={2023}
}
@article{yakaboski2023ai,
  title={AI for Open Science: A Multi-Agent Perspective for Ethically Translating Data to Knowledge},
  author={Yakaboski, Chase and Hyde, Gregory and Nyanhongo, Clement and Santos Jr, Eugene},
  journal={arXiv preprint arXiv:2310.18852},
  year={2023}
}
@article{li2024ai4r,
  title={AI4R: The fifth scientific research paradigm},
  author={LI, Guojie},
  journal={Bulletin of Chinese Academy of Sciences (Chinese Version)},
  volume={39},
  number={1},
  pages={1--9},
  year={2024},
  publisher={Bulletin of Chinese Academy of Sciences}
}
@article{ai4science2023impact,
  title={The impact of large language models on scientific discovery: a preliminary study using gpt-4},
  author={AI4Science, Microsoft Research and Quantum, Microsoft Azure},
  journal={arXiv preprint arXiv:2311.07361},
  year={2023}
}
@article{lecun2015deep,
  title={Deep learning},
  author={LeCun, Yann and Bengio, Yoshua and Hinton, Geoffrey},
  journal={nature},
  volume={521},
  number={7553},
  pages={436--444},
  year={2015},
  publisher={Nature Publishing Group UK London}
}
@article{romera2024mathematical,
  title={Mathematical discoveries from program search with large language models},
  author={Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M Pawan and Dupont, Emilien and Ruiz, Francisco JR and Ellenberg, Jordan S and Wang, Pengming and Fawzi, Omar and others},
  journal={Nature},
  volume={625},
  number={7995},
  pages={468--475},
  year={2024},
  publisher={Nature Publishing Group UK London}
}
@article{wang2024autosurvey,
  title={AutoSurvey: Large Language Models Can Automatically Write Surveys},
  author={Wang, Yidong and Guo, Qi and Yao, Wenjin and Zhang, Hongbo and Zhang, Xin and Wu, Zhen and Zhang, Meishan and Dai, Xinyu and Zhang, Min and Wen, Qingsong and others},
  journal={arXiv preprint arXiv:2406.10252},
  year={2024}
}
@article{merchant2023scaling,
  title={Scaling deep learning for materials discovery},
  author={Merchant, Amil and Batzner, Simon and Schoenholz, Samuel S and Aykol, Muratahan and Cheon, Gowoon and Cubuk, Ekin Dogus},
  journal={Nature},
  volume={624},
  number={7990},
  pages={80--85},
  year={2023},
  publisher={Nature Publishing Group UK London}
}
@article{pyzer2022accelerating,
  title={Accelerating materials discovery using artificial intelligence, high performance computing and robotics},
  author={Pyzer-Knapp, Edward O and Pitera, Jed W and Staar, Peter WJ and Takeda, Seiji and Laino, Teodoro and Sanders, Daniel P and Sexton, James and Smith, John R and Curioni, Alessandro},
  journal={npj Computational Materials},
  volume={8},
  number={1},
  pages={84},
  year={2022},
  publisher={Nature Publishing Group UK London}
}
@article{hayes2024simulating,
  title={Simulating 500 million years of evolution with a language model},
  author={Hayes, Tomas and Rao, Roshan and Akin, Halil and Sofroniew, Nicholas J and Oktay, Deniz and Lin, Zeming and Verkuil, Robert and Tran, Vincent Q and Deaton, Jonathan and Wiggert, Marius and others},
  journal={bioRxiv},
  pages={2024--07},
  year={2024},
  publisher={Cold Spring Harbor Laboratory}
}
@article{jumper2021highly,
  title={Highly accurate protein structure prediction with AlphaFold},
  author={Jumper, John and Evans, Richard and Pritzel, Alexander and Green, Tim and Figurnov, Michael and Ronneberger, Olaf and Tunyasuvunakool, Kathryn and Bates, Russ and {{Z}}{'\i}dek, Augustin and Potapenko, Anna and others},
  journal={nature},
  volume={596},
  number={7873},
  pages={583--589},
  year={2021},
  publisher={Nature Publishing Group}
}
@incollection{buchanan1981dendral,
  title={DENDRAL and Meta-DENDRAL: Their applications dimension},
  author={Buchanan, Bruce G and Feigenbaum, Edward A},
  booktitle={Readings in artificial intelligence},
  pages={313--322},
  year={1981},
  publisher={Elsevier}
}
@inproceedings{langley2024integrated,
  title={Integrated Systems for Computational Scientific Discovery},
  author={Langley, Pat},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={38},
  number={20},
  pages={22598--22606},
  year={2024}
}
@book{langley1987scientific,
  title={Scientific discovery: Computational explorations of the creative processes},
  author={Langley, P},
  year={1987},
  publisher={MIT Press}
}
@article{wang2023scimon,
  title={Scimon: Scientific inspiration machines optimized for novelty},
  author={Wang, Qingyun and Downey, Doug and Ji, Heng and Hope, Tom},
  journal={arXiv preprint arXiv:2305.14259},
  year={2023}
}
@inproceedings{huang2024mlagentbench,
  title={MLAgentBench: Evaluating Language Agents on Machine Learning Experimentation},
  author={Huang, Qian and Vora, Jian and Liang, Percy and Leskovec, Jure},
  booktitle={Forty-first International Conference on Machine Learning},
  year={2024}
}
@article{yang2023large,
  title={Large language models for automated open-domain scientific hypotheses discovery},
  author={Yang, Zonglin and Du, Xinya and Li, Junxian and Zheng, Jie and Poria, Soujanya and Cambria, Erik},
  journal={arXiv preprint arXiv:2309.02726},
  year={2023}
}
@misc{li2024mlrcopilotautonomousmachinelearning,
      title={MLR-Copilot: Autonomous Machine Learning Research based on Large Language Models Agents},
      author={Ruochen Li and Teerth Patel and Qingyun Wang and Xinya Du},
      year={2024},
      eprint={2408.14033},
      archivePrefix={arXiv},
      primaryClass={cs.AI},
      url={https://arxiv.org/abs/2408.14033},
}
@article{baek2024researchagent,
  title={Researchagent: Iterative research idea generation over scientific literature with large language models},
  author={Baek, Jinheon and Jauhar, Sujay Kumar and Cucerzan, Silviu and Hwang, Sung Ju},
  journal={arXiv preprint arXiv:2404.07738},
  year={2024}
}
@book{goodfellow2016deep,
title={Deep learning},
author={Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron and Bengio, Yoshua},
volume={1},
year={2016},
publisher={MIT Press}
}
@article{liang2024mapping,
  title={Mapping the increasing use of llms in scientific papers},
  author={Liang, Weixin and Zhang, Yaohui and Wu, Zhengxuan and Lepp, Haley and Ji, Wenlong and Zhao, Xuandong and Cao, Hancheng and Liu, Sheng and He, Siyu and Huang, Zhi and others},
  journal={arXiv preprint arXiv:2404.01268},
  year={2024}
}
%%%%%% Prompt-based Agents %%%%%%
@article{hu2024automated,
  title={Automated design of agentic systems},
  author={Hu, Shengran and Lu, Cong and Clune, Jeff},
  journal={arXiv preprint arXiv:2408.08435},
  year={2024}
}
%%%%%% AI Scientist bib %%%%%%
@article{lu2024ai,
  title={The ai scientist: Towards fully automated open-ended scientific discovery},
  author={Lu, Chris and Lu, Cong and Lange, Robert Tjarko and Foerster, Jakob and Clune, Jeff and Ha, David},
  journal={arXiv preprint arXiv:2408.06292},
  year={2024}
}
@article{si2024can,
  title={Can LLMs Generate Novel Research Ideas? A Large-Scale Human Study with 100+ NLP Researchers},
  author={Si, Chenglei and Yang, Diyi and Hashimoto, Tatsunori},
  journal={arXiv preprint arXiv:2409.04109},
  year={2024}
}
@article{tyser2024ai,
  title={AI-Driven Review Systems: Evaluating LLMs in Scalable and Bias-Aware Academic Reviews},
  author={Tyser, Keith and Segev, Ben and Longhitano, Gaston and Zhang, Xin-Yu and Meeks, Zachary and Lee, Jason and Garg, Uday and Belsten, Nicholas and Shporer, Avi and Udell, Madeleine and others},
  journal={arXiv preprint arXiv:2408.10365},
  year={2024}
}
@article{radensky2024scideator,
  title={Scideator: Human-LLM Scientific Idea Generation Grounded in Research-Paper Facet Recombination},
  author={Radensky, Marissa and Shahid, Simra and Fok, Raymond and Siangliulue, Pao and Hope, Tom and Weld, Daniel S},
  journal={arXiv preprint arXiv:2409.14634},
  year={2024}
}
@article{taniguchi2024collective,
  title={Collective Predictive Coding as Model of Science: Formalizing Scientific Activities Towards Generative Science},
  author={Taniguchi, Tadahiro and Takagi, Shiro and Otsuka, Jun and Hayashi, Yusuke and Hamada, Hiro Taiyo},
  journal={arXiv preprint arXiv:2409.00102},
  year={2024}
}
@inproceedings{lenat1977automated,
  title={Automated theory formation in mathematics.},
  author={Lenat, Douglas B},
  booktitle={IJCAI},
  volume={77},
  pages={833--842},
  year={1977}
}
@article{lenat1983eurisko,
  title={EURISKO: a program that learns new heuristics and domain concepts: the nature of heuristics III: program design and results},
  author={Lenat, Douglas B},
  journal={Artificial intelligence},
  volume={21},
  number={1-2},
  pages={61--98},
  year={1983},
  publisher={Elsevier}
}
@inproceedings{hutter2001towards,
  title={Towards a universal theory of artificial intelligence based on algorithmic probability and sequential decisions},
  author={Hutter, Marcus},
  booktitle={European conference on machine learning},
  pages={226--238},
  year={2001},
  organization={Springer}
}
@article{du2024llms,
  title={Llms assist nlp researchers: Critique paper (meta-) reviewing},
  author={Du, Jiangshu and Wang, Yibo and Zhao, Wenting and Deng, Zhongfen and Liu, Shuaiqi and Lou, Renze and Zou, Henry Peng and Venkit, Pranav Narayanan and Zhang, Nan and Srinath, Mukund and others},
  journal={arXiv preprint arXiv:2406.16253},
  year={2024}
}
@article{oberg2022teaching,
  title={Teaching Science as a Process, Not a Set of Facts: A Case-Study of a First-Year Science Seminar},
  author={{"O}berg, Gunilla and Campbell, Alice and Fox, Joanne and Graves, Marcia and Ivanochko, Tara and Matsuchi, Linda and Mouat, Isobel and Welsh, Ashley},
  journal={Science \& Education},
  pages={1--31},
  year={2022},
  publisher={Springer}
}
@article{smith2006peer,
  title={Peer review: a flawed process at the heart of science and journals},
  author={Smith, Richard},
  journal={Journal of the royal society of medicine},
  volume={99},
  number={4},
  pages={178--182},
  year={2006},
  publisher={SAGE Publications Sage UK: London, England}
}
@misc{boughton2018research,
  title={Research Integrity and Peer Review—past highlights and future directions},
  author={Boughton, Stephanie L and Kowalczuk, Maria K and Meerpohl, Joerg J and Wager, Elizabeth and Moylan, Elizabeth C},
  journal={Research Integrity and Peer Review},
  volume={3},
  pages={1--5},
  year={2018},
  publisher={Springer}
}


@inproceedings{
pang2024iterative,
title={Iterative Reasoning Preference Optimization},
author={Richard Yuanzhe Pang and Weizhe Yuan and He He and Kyunghyun Cho and Sainbayar Sukhbaatar and Jason E Weston},
booktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},
year={2024},
url={https://openreview.net/forum?id=4XIKfvNYvx}
}
@misc{yang2024collaborative,
    title={Collaborative Evolving Strategy for Automatic Data-Centric Development},
    author={Xu Yang and Haotian Chen and Wenjun Feng and Haoxue Wang and Zeqi Ye and Xinjie Shen and Xiao Yang and Shizhao Sun and Weiqing Liu and Jiang Bian},
    year={2024},
    eprint={2407.18690},
    archivePrefix={arXiv},
    primaryClass={cs.AI}
}
@book{david1963method,
  title={The method of paired comparisons},
  author={David, Herbert Aron},
  volume={12},
  year={1963},
  publisher={London}
}
@misc{su2024analysisicml2023ranking,
      title={Analysis of the ICML 2023 Ranking Data: Can Authors' Opinions of Their Own Papers Assist Peer Review in Machine Learning?},
      author={Buxin Su and Jiayao Zhang and Natalie Collina and Yuling Yan and Didong Li and Kyunghyun Cho and Jianqing Fan and Aaron Roth and Weijie J. Su},
      year={2024},
      eprint={2408.13430},
      archivePrefix={arXiv},
      primaryClass={stat.AP},
      url={https://arxiv.org/abs/2408.13430},
}
@article{wang2023scientific,
  title={Scientific discovery in the age of artificial intelligence},
  author={Wang, Hanchen and Fu, Tianfan and Du, Yuanqi and Gao, Wenhao and Huang, Kexin and Liu, Ziming and Chandak, Payal and Liu, Shengchao and Van Katwyk, Peter and Deac, Andreea and others},
  journal={Nature},
  volume={620},
  number={7972},
  pages={47--60},
  year={2023},
  publisher={Nature Publishing Group UK London}
}
@misc{qwen2.5,
    title = {Qwen2.5: A Party Of Foundation Models},
    url = {https://qwenlm.github.io/blog/qwen2.5/},
    author = {Qwen Team},
    month = {September},
    year = {2024}
}
@misc{dubey2024llama3herdmodels,
      title={The Llama 3 Herd of Models},
      author={Abhimanyu Dubey and Abhinav Jauhri and Abhinav Pandey and Abhishek Kadian and Ahmad Al-Dahle and Aiesha Letman and Akhil Mathur and Alan Schelten and Amy Yang and Angela Fan and Anirudh Goyal and Anthony Hartshorn and Aobo Yang and Archi Mitra and Archie Sravankumar and Artem Korenev and Arthur Hinsvark and Arun Rao and Aston Zhang and Aurelien Rodriguez and Austen Gregerson and Ava Spataru and Baptiste Roziere and Bethany Biron and Binh Tang and Bobbie Chern and Charlotte Caucheteux and Chaya Nayak and Chloe Bi and Chris Marra and Chris McConnell and Christian Keller and Christophe Touret and Chunyang Wu and Corinne Wong and Cristian Canton Ferrer and Cyrus Nikolaidis and Damien Allonsius and Daniel Song and Danielle Pintz and Danny Livshits and David Esiobu and Dhruv Choudhary and Dhruv Mahajan and Diego Garcia-Olano and Diego Perino and Dieuwke Hupkes and Egor Lakomkin and Ehab AlBadawy and Elina Lobanova and Emily Dinan and Eric Michael Smith and Filip Radenovic and Frank Zhang and Gabriel Synnaeve and Gabrielle Lee and Georgia Lewis Anderson and Graeme Nail and Gregoire Mialon and Guan Pang and Guillem Cucurell and Hailey Nguyen and Hannah Korevaar and Hu Xu and Hugo Touvron and Iliyan Zarov and Imanol Arrieta Ibarra and Isabel Kloumann and Ishan Misra and Ivan Evtimov and Jade Copet and Jaewon Lee and Jan Geffert and Jana Vranes and Jason Park and Jay Mahadeokar and Jeet Shah and Jelmer van der Linde and Jennifer Billock and Jenny Hong and Jenya Lee and Jeremy Fu and Jianfeng Chi and Jianyu Huang and Jiawen Liu and Jie Wang and Jiecao Yu and Joanna Bitton and Joe Spisak and Jongsoo Park and Joseph Rocca and Joshua Johnstun and Joshua Saxe and Junteng Jia and Kalyan Vasuden Alwala and Kartikeya Upasani and Kate Plawiak and Ke Li and Kenneth Heafield and Kevin Stone and Khalid El-Arini and Krithika Iyer and Kshitiz Malik and Kuenley Chiu and Kunal Bhalla and Lauren Rantala-Yeary and Laurens van der Maaten and Lawrence Chen and Liang Tan and Liz Jenkins and Louis Martin and Lovish Madaan and Lubo Malo and Lukas Blecher and Lukas Landzaat and Luke de Oliveira and Madeline Muzzi and Mahesh Pasupuleti and Mannat Singh and Manohar Paluri and Marcin Kardas and Mathew Oldham and Mathieu Rita and Maya Pavlova and Melanie Kambadur and Mike Lewis and Min Si and Mitesh Kumar Singh and Mona Hassan and Naman Goyal and Narjes Torabi and Nikolay Bashlykov and Nikolay Bogoychev and Niladri Chatterji and Olivier Duchenne and Onur Çelebi and Patrick Alrassy and Pengchuan Zhang and Pengwei Li and Petar Vasic and Peter Weng and Prajjwal Bhargava and Pratik Dubal and Praveen Krishnan and Punit Singh Koura and Puxin Xu and Qing He and Qingxiao Dong and Ragavan Srinivasan and Raj Ganapathy and Ramon Calderer and Ricardo Silveira Cabral and Robert Stojnic and Roberta Raileanu and Rohit Girdhar and Rohit Patel and Romain Sauvestre and Ronnie Polidoro and Roshan Sumbaly and Ross Taylor and Ruan Silva and Rui Hou and Rui Wang and Saghar Hosseini and Sahana Chennabasappa and Sanjay Singh and Sean Bell and Seohyun Sonia Kim and Sergey Edunov and Shaoliang Nie and Sharan Narang and Sharath Raparthy and Sheng Shen and Shengye Wan and Shruti Bhosale and Shun Zhang and Simon Vandenhende and Soumya Batra and Spencer Whitman and Sten Sootla and Stephane Collot and Suchin Gururangan and Sydney Borodinsky and Tamar Herman and Tara Fowler and Tarek Sheasha and Thomas Georgiou and Thomas Scialom and Tobias Speckbacher and Todor Mihaylov and Tong Xiao and Ujjwal Karn and Vedanuj Goswami and Vibhor Gupta and Vignesh Ramanathan and Viktor Kerkez and Vincent Gonguet and Virginie Do and Vish Vogeti and Vladan Petrovic and Weiwei Chu and Wenhan Xiong and Wenyin Fu and Whitney Meers and Xavier Martinet and Xiaodong Wang and Xiaoqing Ellen Tan and Xinfeng Xie and Xuchao Jia and Xuewei Wang and Yaelle Goldschlag and Yashesh Gaur and Yasmine Babaei and Yi Wen and Yiwen Song and Yuchen Zhang and Yue Li and Yuning Mao and Zacharie Delpierre Coudert and Zheng Yan and Zhengxing Chen and Zoe Papakipos and Aaditya Singh and Aaron Grattafiori and Abha Jain and Adam Kelsey and Adam Shajnfeld and Adithya Gangidi and Adolfo Victoria and Ahuva Goldstand and Ajay Menon and Ajay Sharma and Alex Boesenberg and Alex Vaughan and Alexei Baevski and Allie Feinstein and Amanda Kallet and Amit Sangani and Anam Yunus and Andrei Lupu and Andres Alvarado and Andrew Caples and Andrew Gu and Andrew Ho and Andrew Poulton and Andrew Ryan and Ankit Ramchandani and Annie Franco and Aparajita Saraf and Arkabandhu Chowdhury and Ashley Gabriel and Ashwin Bharambe and Assaf Eisenman and Azadeh Yazdan and Beau James and Ben Maurer and Benjamin Leonhardi and Bernie Huang and Beth Loyd and Beto De Paola and Bhargavi Paranjape and Bing Liu and Bo Wu and Boyu Ni and Braden Hancock and Bram Wasti and Brandon Spence and Brani Stojkovic and Brian Gamido and Britt Montalvo and Carl Parker and Carly Burton and Catalina Mejia and Changhan Wang and Changkyu Kim and Chao Zhou and Chester Hu and Ching-Hsiang Chu and Chris Cai and Chris Tindal and Christoph Feichtenhofer and Damon Civin and Dana Beaty and Daniel Kreymer and Daniel Li and Danny Wyatt and David Adkins and David Xu and Davide Testuggine and Delia David and Devi Parikh and Diana Liskovich and Didem Foss and Dingkang Wang and Duc Le and Dustin Holland and Edward Dowling and Eissa Jamil and Elaine Montgomery and Eleonora Presani and Emily Hahn and Emily Wood and Erik Brinkman and Esteban Arcaute and Evan Dunbar and Evan Smothers and Fei Sun and Felix Kreuk and Feng Tian and Firat Ozgenel and Francesco Caggioni and Francisco Guzmán and Frank Kanayet and Frank Seide and Gabriela Medina Florez and Gabriella Schwarz and Gada Badeer and Georgia Swee and Gil Halpern and Govind Thattai and Grant Herman and Grigory Sizov and Guangyi and Zhang and Guna Lakshminarayanan and Hamid Shojanazeri and Han Zou and Hannah Wang and Hanwen Zha and Haroun Habeeb and Harrison Rudolph and Helen Suk and Henry Aspegren and Hunter Goldman and Ibrahim Damlaj and Igor Molybog and Igor Tufanov and Irina-Elena Veliche and Itai Gat and Jake Weissman and James Geboski and James Kohli and Japhet Asher and Jean-Baptiste Gaya and Jeff Marcus and Jeff Tang and Jennifer Chan and Jenny Zhen and Jeremy Reizenstein and Jeremy Teboul and Jessica Zhong and Jian Jin and Jingyi Yang and Joe Cummings and Jon Carvill and Jon Shepard and Jonathan McPhie and Jonathan Torres and Josh Ginsburg and Junjie Wang and Kai Wu and Kam Hou U and Karan Saxena and Karthik Prasad and Kartikay Khandelwal and Katayoun Zand and Kathy Matosich and Kaushik Veeraraghavan and Kelly Michelena and Keqian Li and Kun Huang and Kunal Chawla and Kushal Lakhotia and Kyle Huang and Lailin Chen and Lakshya Garg and Lavender A and Leandro Silva and Lee Bell and Lei Zhang and Liangpeng Guo and Licheng Yu and Liron Moshkovich and Luca Wehrstedt and Madian Khabsa and Manav Avalani and Manish Bhatt and Maria Tsimpoukelli and Martynas Mankus and Matan Hasson and Matthew Lennie and Matthias Reso and Maxim Groshev and Maxim Naumov and Maya Lathi and Meghan Keneally and Michael L. Seltzer and Michal Valko and Michelle Restrepo and Mihir Patel and Mik Vyatskov and Mikayel Samvelyan and Mike Clark and Mike Macey and Mike Wang and Miquel Jubert Hermoso and Mo Metanat and Mohammad Rastegari and Munish Bansal and Nandhini Santhanam and Natascha Parks and Natasha White and Navyata Bawa and Nayan Singhal and Nick Egebo and Nicolas Usunier and Nikolay Pavlovich Laptev and Ning Dong and Ning Zhang and Norman Cheng and Oleg Chernoguz and Olivia Hart and Omkar Salpekar and Ozlem Kalinli and Parkin Kent and Parth Parekh and Paul Saab and Pavan Balaji and Pedro Rittner and Philip Bontrager and Pierre Roux and Piotr Dollar and Polina Zvyagina and Prashant Ratanchandani and Pritish Yuvraj and Qian Liang and Rachad Alao and Rachel Rodriguez and Rafi Ayub and Raghotham Murthy and Raghu Nayani and Rahul Mitra and Raymond Li and Rebekkah Hogan and Robin Battey and Rocky Wang and Rohan Maheswari and Russ Howes and Ruty Rinott and Sai Jayesh Bondu and Samyak Datta and Sara Chugh and Sara Hunt and Sargun Dhillon and Sasha Sidorov and Satadru Pan and Saurabh Verma and Seiji Yamamoto and Sharadh Ramaswamy and Shaun Lindsay and Shaun Lindsay and Sheng Feng and Shenghao Lin and Shengxin Cindy Zha and Shiva Shankar and Shuqiang Zhang and Shuqiang Zhang and Sinong Wang and Sneha Agarwal and Soji Sajuyigbe and Soumith Chintala and Stephanie Max and Stephen Chen and Steve Kehoe and Steve Satterfield and Sudarshan Govindaprasad and Sumit Gupta and Sungmin Cho and Sunny Virk and Suraj Subramanian and Sy Choudhury and Sydney Goldman and Tal Remez and Tamar Glaser and Tamara Best and Thilo Kohler and Thomas Robinson and Tianhe Li and Tianjun Zhang and Tim Matthews and Timothy Chou and Tzook Shaked and Varun Vontimitta and Victoria Ajayi and Victoria Montanez and Vijai Mohan and Vinay Satish Kumar and Vishal Mangla and Vítor Albiero and Vlad Ionescu and Vlad Poenaru and Vlad Tiberiu Mihailescu and Vladimir Ivanov and Wei Li and Wenchen Wang and Wenwen Jiang and Wes Bouaziz and Will Constable and Xiaocheng Tang and Xiaofang Wang and Xiaojian Wu and Xiaolan Wang and Xide Xia and Xilun Wu and Xinbo Gao and Yanjun Chen and Ye Hu and Ye Jia and Ye Qi and Yenda Li and Yilin Zhang and Ying Zhang and Yossi Adi and Youngjin Nam and Yu and Wang and Yuchen Hao and Yundi Qian and Yuzi He and Zach Rait and Zachary DeVito and Zef Rosnbrick and Zhaoduo Wen and Zhenyu Yang and Zhiwei Zhao},
      year={2024},
      eprint={2407.21783},
      archivePrefix={arXiv},
      primaryClass={cs.AI},
      url={https://arxiv.org/abs/2407.21783},
}

@inproceedings{
xiong2024iterativepreferencelearninghuman,
title={Iterative Preference Learning from Human Feedback: Bridging Theory and Practice for {RLHF} under {KL}-constraint},
author={Wei Xiong and Hanze Dong and Chenlu Ye and Ziqi Wang and Han Zhong and Heng Ji and Nan Jiang and Tong Zhang},
booktitle={Forty-first International Conference on Machine Learning},
year={2024},
url={https://openreview.net/forum?id=c1AKcA6ry1}
}


@article{zhu2024safetylock,
title={Locking Down the finetuned LLM Safety},
author={anonymity},
journal = {ICLR Paper Submission},
year={2024}}
@inproceedings{
rafailov2023direct,
title={Direct Preference Optimization: Your Language Model is Secretly a Reward Model},
author={Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D Manning and Stefano Ermon and Chelsea Finn},
booktitle={Thirty-seventh Conference on Neural Information Processing Systems},
year={2023},
url={https://openreview.net/forum?id=HPuSIXJaa9}
}
@misc{liu2024iterativelengthregularizeddirectpreference,
      title={Iterative Length-Regularized Direct Preference Optimization: A Case Study on Improving 7B Language Models to \text{GPT-4} Level},
      author={Jie Liu and Zhanhui Zhou and Jiaheng Liu and Xingyuan Bu and Chao Yang and Han-Sen Zhong and Wanli Ouyang},
      year={2024},
      eprint={2406.11817},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2406.11817},
}

@inproceedings{
meng2024simposimplepreferenceoptimization,
title={Sim{PO}: Simple Preference Optimization with a Reference-Free Reward},
author={Yu Meng and Mengzhou Xia and Danqi Chen},
booktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},
year={2024},
url={https://openreview.net/forum?id=3Tzcot1LKb}
}

@inproceedings{
lee2024rlaifvsrlhfscaling,
title={{RLAIF} vs. {RLHF}: Scaling Reinforcement Learning from Human Feedback with {AI} Feedback},
author={Harrison Lee and Samrat Phatale and Hassan Mansoor and Thomas Mesnard and Johan Ferret and Kellie Ren Lu and Colton Bishop and Ethan Hall and Victor Carbune and Abhinav Rastogi and Sushant Prakash},
booktitle={Forty-first International Conference on Machine Learning},
year={2024},
url={https://openreview.net/forum?id=uydQ2W41KO}
}
@article{an2023learning,
  title={Learning from mistakes makes llm better reasoner},
  author={An, Shengnan and Ma, Zexiong and Lin, Zeqi and Zheng, Nanning and Lou, Jian-Guang and Chen, Weizhu},
  journal={arXiv preprint arXiv:2310.20689},
  year={2023}
}

@inproceedings{
yuan2024self,
title={Self-Rewarding Language Models},
author={Weizhe Yuan and Richard Yuanzhe Pang and Kyunghyun Cho and Xian Li and Sainbayar Sukhbaatar and Jing Xu and Jason E Weston},
booktitle={Forty-first International Conference on Machine Learning},
year={2024},
url={https://openreview.net/forum?id=0NphYCmgua}
}
@article{jiang2023mistral,
  title={Mistral 7B},
  author={Jiang, Albert Q and Sablayrolles, Alexandre and Mensch, Arthur and Bamford, Chris and Chaplot, Devendra Singh and Casas, Diego de las and Bressand, Florian and Lengyel, Gianna and Lample, Guillaume and Saulnier, Lucile and others},
  journal={arXiv preprint arXiv:2310.06825},
  year={2023}
}
@misc{taniguchi2024collectivepredictivecodingmodel,
      title={Collective Predictive Coding as Model of Science: Formalizing Scientific Activities Towards Generative Science},
      author={Tadahiro Taniguchi and Shiro Takagi and Jun Otsuka and Yusuke Hayashi and Hiro Taiyo Hamada},
      year={2024},
      eprint={2409.00102},
      archivePrefix={arXiv},
      primaryClass={physics.soc-ph},
      url={https://arxiv.org/abs/2409.00102},
}

@inproceedings{
Hu2024AutomatedDO,
title={Automated Design of Agentic Systems},
author={Shengran Hu and Cong Lu and Jeff Clune},
booktitle={The Thirteenth International Conference on Learning Representations},
year={2025},
url={https://openreview.net/forum?id=t9U3LW7JVX}
}
@inproceedings{NIPS2017_3f5ee243,
 author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, \L ukasz and Polosukhin, Illia},
 booktitle = {Advances In Neural Information Processing Systems},
 editor = {I. Guyon and U. Von Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett},
 pages = {},
 publisher = {Curran Associates, Inc.},
 title = {Attention Is All You Need},
 url = {https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf},
 volume = {30},
 year = {2017}
}

@misc{zhu2024mossenablingcodedrivenevolution,
      title={MOSS: Enabling Code-Driven Evolution and Context Management for AI Agents},
      author={Ming Zhu and Yi Zhou},
      year={2024},
      eprint={2409.16120},
      archivePrefix={arXiv},
      primaryClass={cs.SE},
      url={https://arxiv.org/abs/2409.16120},
}
@inproceedings{bao2024fast,
  title={Fast-DetectGPT: Efficient Zero-Shot Detection of Machine-Generated Text via Conditional Probability Curvature},
  author={Bao, Guangsheng and Zhao, Yanbin and Teng, Zhiyang and Yang, Linyi and Zhang, Yue},
  booktitle={The Twelfth International Conference on Learning Representations},
  year = {2024}
}
@article{liu2024towards,
  title={Towards Fully Autonomous Research Powered by LLMs: Case Study on Simulations},
  author={Liu, Zhihan and Chai, Yubo and Li, Jianfeng},
  journal={arXiv preprint arXiv:2408.15512},
  year={2024}
}
@article{yang2024qwen2,
  title={Qwen2 technical report},
  author={Yang, An and Yang, Baosong and Hui, Binyuan and Zheng, Bo and Yu, Bowen and Zhou, Chang and Li, Chengpeng and Li, Chengyuan and Liu, Dayiheng and Huang, Fei and others},
  journal={arXiv preprint arXiv:2407.10671},
  year={2024}
}
@article{lowe2017multi,
  title={Multi-agent actor-critic for mixed cooperative-competitive environments},
  author={Lowe, Ryan and Tamar, Aviv and Harb, Jean and Pieter Abbeel, OpenAI and Mordatch, Igor},
  journal={Advances in neural information processing systems},
  volume={30},
  year={2017}
}
